library(tictoc)
library(ggplot2)
library(dplyr)
library(data.table)
names_functions = list.files(here::here("functions"))
for (f in names_functions)
source(here::here("functions", f))
rm(f, names_functions)Export and plot results from MCMC output
Libraries, functions
Load “global” variables
parties <- load_parties()
states <- load_states()
regions <- load_regions()
states_regions <- load_dataland_states_regions()
n_parties_by_state <- load_n_parties_by_geography("state")
dates_campaign <- load_dates_election_campaign(year = 2024)
electoral_votes <- load_electoral_votes()
election_day <- load_election_day()Load priors -> visualize along with posterior distribution of \pi_T
priors <- readRDS(here::here("priors", "priors.Rds"))Generate results for different scenarios
scenarios <- load_scenarios()List to store plots
plts <- list()Define a function to generate csv output and plots for each scenario
gen_results <- function(plts, scen) {
print(scen)
out <- readRDS(here::here("model", paste0("mcmc_out_", scen, ".Rds")))
df_polls <- read.csv(here::here("data", paste0("df_polls_", scen, ".csv")))
df_polls$date <- as.Date(df_polls$date)
# make non-zero polls more visible in plots
df_polls$value = ifelse(df_polls$value == 0,
NA,
df_polls$value)
df_prior_ppi <- invert_alr_on_prior(priors[[scen]][["m_mmu_T"]])
# use data.table -> much, much quicker than tidyverse!
df_draws_ppi <- convert_draws_to_dt(
rstan::extract(out, pars = "ppi")[["ppi"]],
geographies = states,
parties = parties,
dates_campaign = dates_campaign)
df_draws_ppi_reg <- convert_draws_to_dt(
rstan::extract(out, pars = "ppi_reg")[["ppi_reg"]],
geographies = regions,
parties = parties,
dates_campaign = dates_campaign)
df_draws_ppi_nat <- convert_draws_to_dt(
rstan::extract(out, pars = "ppi_nat")[["ppi_nat"]],
geographies = "national",
parties = parties,
dates_campaign = dates_campaign)
tic("calc prob win election")
df_prob_win_election <- do.call(
"rbind",
lapply(
dates_campaign,
FUN = calc_prob_win_election,
df_draws_ppi = df_draws_ppi,
df_draws_ppi_nat = df_draws_ppi_nat,
states = states,
parties = parties,
electoral_votes = electoral_votes
)
)
toc()
tic("calc prob win states")
df_prob_win_states <- do.call(
"rbind",
lapply(
dates_campaign,
FUN = calc_prob_win_states,
df_draws_ppi = df_draws_ppi,
states = states,
parties = parties
)
)
toc()
# Export mean vote share and win probabilities to read
tic("calc mean vote states")
df_draws_ppi %>%
group_by(t, party, geography) %>%
summarise(mean_vote_share = mean(values)) %>%
rename(date = t, province = geography) %>%
mutate(party = tolower(party)) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "mean_vote_share",
names_glue = "{party}_{.value}") -> df_out_mean
toc()
df_prob_win_states %>%
mutate(party = tolower(party)) %>%
rename(province = geography) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "prob_win",
names_glue = "{party}_{.value}") -> df_out_prob_win_states
write.csv(
merge(
# only export until day of latest available poll
filter(df_out_mean, date <= max(df_polls$date)),
filter(df_out_prob_win_states, date <= max(df_polls$date)),
by = c("date", "province")
),
file = here::here(paste0("provincial_forecast_", scen, ".csv")),
row.names = FALSE
)
tic("calc mean vote share national")
df_draws_ppi_nat %>%
select(-geography) %>%
group_by(t, party) %>%
summarise(mean_vote_share = mean(values)) %>%
rename(date = t) %>%
mutate(party = tolower(party)) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "mean_vote_share",
names_glue = "{party}_{.value}") -> df_out_mean_nat
toc()
tic("transform win prob election for export")
df_prob_win_election %>%
mutate(party = tolower(party)) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "prob_win",
names_glue = "{party}_{.value}") -> df_out_prob_win_election
toc()
write.csv(
merge(
# only export until day of latest available poll
filter(df_out_mean_nat, date <= max(df_polls$date)),
filter(df_out_prob_win_election, date <= max(df_polls$date)),
by = "date"
),
file = here::here(paste0("national_forecast_", scen, ".csv")),
row.names = FALSE
)
# Plots
plot_prob_win_election(
df_prob_win_election,
election_day,
scen) -> plts[[scen]][["plt_prob_win_election"]]
tic("plot win prob election over time")
plot_prob_win_election_over_time(
df_prob_win_election,
scen) -> plts[[scen]][["plt_prob_win_election_over_time"]]
toc()
plot_prob_win_states(
df_prob_win_states,
election_day,
scen) -> plts[[scen]][["plt_prob_win_states"]]
plot_prob_win_states_over_time(
df_prob_win_states,
scen) -> plts[[scen]][["plt_prob_win_states_over_time"]]
plot_ppiT(
df_draws_ppi,
df_prior_ppi,
election_day,
n_geographies = length(states),
scen) -> plts[[scen]][["plt_ppiT"]]
plot_ppi(
df_draws_ppi,
filter(df_polls, scenario == scen),
n_geographies = length(states),
type_of_poll = "state",
plt_title_prefix = scen) -> plts[[scen]][["plt_ppi"]]
plot_ppi(
df_draws_ppi_reg,
filter(df_polls, scenario == scen),
n_geographies = length(regions),
type_of_poll = "regional",
plt_title_prefix = scen) -> plts[[scen]][["plt_ppi_reg"]]
plot_ppi(
df_draws_ppi_nat,
filter(df_polls, scenario == scen),
n_geographies = 1,
type_of_poll = "national",
plt_title_prefix = scen) -> plts[[scen]][["plt_ppi_nat"]]
rm(df_draws_ppi,
df_draws_ppi_reg,
df_draws_ppi_nat,
df_polls,
df_prior_ppi,
df_prob_win_election,
df_prob_win_states,
df_out_prob_win_election,
df_out_prob_win_states,
df_out_mean,
df_out_mean_nat,
out)
plts
}Loop over scenarios
for (scen in scenarios) {
plts <- gen_results(plts, scen)
}[1] "A"
calc prob win election: 206.99 sec elapsed
calc prob win states: 208.59 sec elapsed
calc mean vote states: 0.41 sec elapsed
calc mean vote share national: 0.06 sec elapsed
transform win prob election for export: 0.02 sec elapsed
plot win prob election over time: 0.02 sec elapsed
[1] "B"
calc prob win election: 283.73 sec elapsed
calc prob win states: 196.42 sec elapsed
calc mean vote states: 0.41 sec elapsed
calc mean vote share national: 0.07 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.04 sec elapsed
[1] "C"
calc prob win election: 257.78 sec elapsed
calc prob win states: 195.51 sec elapsed
calc mean vote states: 0.41 sec elapsed
calc mean vote share national: 0.07 sec elapsed
transform win prob election for export: 0 sec elapsed
plot win prob election over time: 0 sec elapsed
[1] "D"
calc prob win election: 264.97 sec elapsed
calc prob win states: 197.3 sec elapsed
calc mean vote states: 0.4 sec elapsed
calc mean vote share national: 0.08 sec elapsed
transform win prob election for export: 0 sec elapsed
plot win prob election over time: 0.01 sec elapsed
[1] "E"
calc prob win election: 232.93 sec elapsed
calc prob win states: 196.06 sec elapsed
calc mean vote states: 0.39 sec elapsed
calc mean vote share national: 0.04 sec elapsed
transform win prob election for export: 0.04 sec elapsed
plot win prob election over time: 0.02 sec elapsed